#set working directory
setwd("C:/Users/Aashi/Desktop/DGE")
##loading library
# load the libraries
library(DESeq2)
library(apeglm)
library(ggplot2)
library(pheatmap)
library(cowplot)
library(hrbrthemes)
library(viridis)
library(plotly)
library(dplyr)
library(tidyverse)
library(org.Hs.eg.db)
library(fgsea)
library("msigdbr")
###DGE
# DGE
##loading count matrix
gse = read.csv("GSE92945_Fibroblast_RNAseq_counts_1.csv", header = TRUE, row.names = 1)
info = read.table("ind.txt", header = T,sep ='\t')
##creating a data matrix
dds = DESeqDataSetFromMatrix(round(gse), info, ~conditions)
##removing lowly expressed genes
keep = rowSums(counts(dds)) >= 10
dds = dds[keep,]
##main deseq
ddsDE = DESeq(dds)
##export normalized read counts
normCOunts = counts(ddsDE, normalized = T)
write.csv(normCOunts,"normalised_data.csv")
##DESEQ results
res = results(ddsDE, alpha = 0.05)
##output DESeq result
resOrdered = res[order(res$padj),]
write.csv(resOrdered, "resOrdered1.csv")
#plotting
# loading and preparing data for plotting
##pca and disp(plots)
vsat = vst(ddsDE, blind = FALSE)
pca = plotPCA(vsat, intgroup = "conditions")
disp = plotDispEsts(ddsDE)
ggplotly(pca)
##load_the_deseq2_results_data_and_metadata
normCOunt = read.csv('normalised_data.csv', row.names = 1)
deSeqRes = read.csv('resOrdered.csv',row.names = 1)
deSeqRes$sig =ifelse(deSeqRes$padj <= 0.05, 'yes', 'no')
deSeqRes = na.omit(deSeqRes)
##ggplots
# ggplots
##ma_plot
maplot = ggplot(deSeqRes, aes(x = log10(baseMean), y = log2FoldChange, color = sig)) + geom_point(alpha = 0.7)+scale_size(range = c(1.4, 19), name = "MA PLOT")+ scale_color_viridis(discrete = TRUE, guide=FALSE)+theme_ipsum()+theme(legend.position = "none")
##volcano plot
volcano = ggplot(deSeqRes, aes(x = log2FoldChange, y = -log10(padj), color = sig)) + geom_point(alpha = 0.7)+scale_size(range = c(1.9, 20), name = "MA PLOT")+ scale_color_viridis(discrete = TRUE, guide=FALSE)+theme_ipsum()+theme(legend.position = "none")
volcano = volcano + ylim(0,15)
##pheatmap
signi = subset(deSeqRes, padj <= 0.05)
allsig= merge(normCOunt, signi, by = 0)
sigcounts = allsig[,2:11]
row.names(sigcounts) = allsig$Row.names
pheat = pheatmap(log2(sigcounts + 1), scale = 'row', show_rownames = F, treeheight_row = 0, treeheight_col = 0)
###cowplot
plot_grid(volcano,pheat[[4]])
plot_grid(maplot,volcano)
###interactive plot using ggplotly
ggplotly(maplot, tooltip = "text")
ggplotly(volcano)
#GSEA
```r
# GSEA
rest <- read_csv("resOrdered1.csv")
head(rest)
## # A tibble: 6 x 7
## ...1 baseMean log2FoldChange lfcSE stat pvalue padj
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ENSG00000189223 756. 4.24 0.533 7.96 1.77e-15 1.64e-11
## 2 ENSG00000198848 1017. -10.1 1.26 -7.98 1.41e-15 1.64e-11
## 3 ENSG00000180155 1057. -3.08 0.398 -7.75 9.11e-15 3.55e-11
## 4 ENSG00000125730 8568. -8.43 1.09 -7.74 9.60e-15 3.55e-11
## 5 ENSG00000243137 6824. -8.62 1.11 -7.76 8.56e-15 3.55e-11
## 6 ENSG00000133048 10922. -11.8 1.55 -7.64 2.23e-14 6.89e-11
rest = rest %>%
rename(gene = ...1 )
##creating a mapping table
library(org.Hs.eg.db)
ens2symbol = AnnotationDbi::select(org.Hs.eg.db,keys = rest$gene,
columns="SYMBOL",
keytype="ENSEMBL")
ens2symbol <- as_tibble(ens2symbol)
view(ens2symbol)
rest <- inner_join(rest, ens2symbol, by=c("gene"="ENSEMBL"))
rest
## # A tibble: 24,124 x 8
## gene baseMean log2FoldChange lfcSE stat pvalue padj SYMBOL
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 ENSG00000189223 756. 4.24 0.533 7.96 1.77e-15 1.64e-11 PAX8-A~
## 2 ENSG00000198848 1017. -10.1 1.26 -7.98 1.41e-15 1.64e-11 CES1
## 3 ENSG00000180155 1057. -3.08 0.398 -7.75 9.11e-15 3.55e-11 LYNX1
## 4 ENSG00000125730 8568. -8.43 1.09 -7.74 9.60e-15 3.55e-11 C3
## 5 ENSG00000243137 6824. -8.62 1.11 -7.76 8.56e-15 3.55e-11 PSG4
## 6 ENSG00000133048 10922. -11.8 1.55 -7.64 2.23e-14 6.89e-11 CHI3L1
## 7 ENSG00000106018 802. -9.12 1.20 -7.58 3.43e-14 9.06e-11 VIPR2
## 8 ENSG00000114805 337. -8.70 1.17 -7.43 1.08e-13 2.50e-10 PLCH1
## 9 ENSG00000162654 1180. -7.96 1.10 -7.21 5.43e-13 1.12e- 9 GBP4
## 10 ENSG00000168675 639. -9.73 1.36 -7.15 8.91e-13 1.65e- 9 LDLRAD4
## # ... with 24,114 more rows
res2 <- rest %>%
dplyr::select(SYMBOL, stat) %>%
na.omit() %>%
distinct() %>%
group_by(SYMBOL) %>%
summarize(stat=mean(stat))
res2
## # A tibble: 18,302 x 2
## SYMBOL stat
## <chr> <dbl>
## 1 A1BG 1.85
## 2 A1BG-AS1 -1.93
## 3 A2M -3.50
## 4 A2M-AS1 -2.30
## 5 A3GALT2 -1.43
## 6 A4GALT -2.17
## 7 AAAS 0.0828
## 8 AACS -0.877
## 9 AADAC 0.838
## 10 AADACL4 -0.769
## # ... with 18,292 more rows
ranks <- deframe(res2)
head(ranks, 20)
## A1BG A1BG-AS1 A2M A2M-AS1 A3GALT2 A4GALT
## 1.84980065 -1.93257957 -3.49846494 -2.30188355 -1.42653887 -2.16739135
## AAAS AACS AADAC AADACL4 AADACP1 AADAT
## 0.08283153 -0.87690060 0.83765433 -0.76913555 -0.21984127 0.26320865
## AAGAB AAK1 AAMDC AAMP AANAT AAR2
## 0.84319947 1.06627871 1.05496098 -0.01067306 1.34247123 0.72511821
## AARD AARS1
## -2.84723813 -0.30592030
####loading reference enrichment database
Hs.GOBP <- msigdbr(species = "Homo sapiens", category = "C5", subcategory = "BP")
Hs.Reactome <- msigdbr(species = "Homo sapiens", category = "C2", subcategory = "CP:REACTOME")
Hs.Hallmark <- msigdbr(species = "Homo sapiens", category = "H")
Hs.GOBP.Entrez <- Hs.GOBP %>% split(x = .$entrez_gene, f = .$gs_name)
Hs.Hallmark.Entrez <- Hs.Hallmark %>% split(x = .$entrez_gene, f = .$gs_name)
Hs.GOBP.Symbol <- Hs.GOBP %>% split(x = .$gene_symbol, f = .$gs_name)
Hs.Hallmark.Symbol <- Hs.Hallmark %>% split(x = .$gene_symbol, f = .$gs_name)
#####Now that we have everything lets run fgsea on it
fgseaRes.Hallmark <- fgsea::fgseaMultilevel(pathways=Hs.Hallmark.Symbol,stats = ranks, eps =0)
GSEA but prettier
# Make the Results tidy
fgseaResTidy <- fgseaRes.Hallmark %>%
as_tibble() %>%
arrange(desc(NES))
fgseaResTidy %>%
dplyr::select(-leadingEdge, -ES) %>%
arrange(padj) %>%
DT::datatable()
head(fgseaResTidy,10)
## # A tibble: 10 x 8
## pathway pval padj log2err ES NES size leadi~1
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <list>
## 1 HALLMARK_E2F_TARGETS 3.62e-55 1.81e-53 1.94 0.737 3.60 200 <chr>
## 2 HALLMARK_MYC_TARGETS_V1 2.03e-47 5.07e-46 1.80 0.712 3.48 199 <chr>
## 3 HALLMARK_G2M_CHECKPOINT 1.36e-36 2.27e-35 1.58 0.659 3.22 199 <chr>
## 4 HALLMARK_MYC_TARGETS_V2 5.52e-14 3.45e-13 0.965 0.704 2.78 58 <chr>
## 5 HALLMARK_OXIDATIVE_PHOSP~ 7.08e-17 5.90e-16 1.06 0.513 2.51 200 <chr>
## 6 HALLMARK_MTORC1_SIGNALING 4.14e-15 2.96e-14 0.997 0.498 2.43 199 <chr>
## 7 HALLMARK_UNFOLDED_PROTEI~ 4.25e-11 2.12e-10 0.851 0.537 2.41 112 <chr>
## 8 HALLMARK_DNA_REPAIR 1.66e-11 9.23e-11 0.863 0.498 2.34 149 <chr>
## 9 HALLMARK_GLYCOLYSIS 1.87e- 8 8.49e- 8 0.734 0.421 2.06 191 <chr>
## 10 HALLMARK_MITOTIC_SPINDLE 2.59e- 7 9.97e- 7 0.675 0.392 1.92 197 <chr>
## # ... with abbreviated variable name 1: leadingEdge
##plotting_the_results
gea_plot = ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col(aes(fill=padj<0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
ggplotly(gea_plot)
#playing around with different pathways
# trying_out_in_different_pathways
##kegg
kegg = fgsea(pathways=gmtPathways("c2.cp.kegg.v6.2.symbols.gmt"), ranks) %>%
as_tibble() %>%
arrange(padj)%>%
DT::datatable()
##go
go = fgsea(pathways=gmtPathways("c5.all.v6.2.symbols.gmt"), ranks) %>%
as_tibble()%>%
arrange(padj)%>%
DT::datatable()
##miR
miR = fgsea(pathways=gmtPathways("c3.mir.v6.2.symbols.gmt"), ranks) %>%
as_tibble()%>%
arrange(padj)%>%
DT::datatable()
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] msigdbr_7.5.1 fgsea_1.20.0
## [3] org.Hs.eg.db_3.14.0 AnnotationDbi_1.56.2
## [5] forcats_0.5.2 stringr_1.5.0
## [7] purrr_0.3.5 readr_2.1.3
## [9] tidyr_1.2.1 tibble_3.1.8
## [11] tidyverse_1.3.2 dplyr_1.0.10
## [13] plotly_4.10.1 viridis_0.6.2
## [15] viridisLite_0.4.1 hrbrthemes_0.8.0
## [17] cowplot_1.1.1 pheatmap_1.0.12
## [19] ggplot2_3.4.0 apeglm_1.16.0
## [21] DESeq2_1.34.0 SummarizedExperiment_1.24.0
## [23] Biobase_2.54.0 MatrixGenerics_1.6.0
## [25] matrixStats_0.63.0 GenomicRanges_1.46.1
## [27] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [29] S4Vectors_0.32.4 BiocGenerics_0.40.0
##
## loaded via a namespace (and not attached):
## [1] snow_0.4-4 readxl_1.4.1 backports_1.4.1
## [4] fastmatch_1.1-3 systemfonts_1.0.4 plyr_1.8.8
## [7] lazyeval_0.2.2 splines_4.1.1 crosstalk_1.2.0
## [10] BiocParallel_1.28.3 digest_0.6.31 htmltools_0.5.4
## [13] fansi_1.0.3 magrittr_2.0.3 memoise_2.0.1
## [16] googlesheets4_1.0.1 tzdb_0.3.0 Biostrings_2.62.0
## [19] annotate_1.72.0 modelr_0.1.10 extrafont_0.18
## [22] vroom_1.6.0 extrafontdb_1.0 bdsmatrix_1.3-6
## [25] timechange_0.1.1 colorspace_2.0-3 blob_1.2.3
## [28] rvest_1.0.3 haven_2.5.1 xfun_0.35
## [31] crayon_1.5.2 RCurl_1.98-1.9 jsonlite_1.8.4
## [34] genefilter_1.76.0 survival_3.2-11 glue_1.6.2
## [37] gtable_0.3.1 gargle_1.2.1 zlibbioc_1.40.0
## [40] XVector_0.34.0 DelayedArray_0.20.0 Rttf2pt1_1.3.11
## [43] scales_1.2.1 mvtnorm_1.1-3 DBI_1.1.3
## [46] Rcpp_1.0.9 xtable_1.8-4 emdbook_1.3.12
## [49] bit_4.0.5 DT_0.26 htmlwidgets_1.6.0
## [52] httr_1.4.4 RColorBrewer_1.1-3 ellipsis_0.3.2
## [55] pkgconfig_2.0.3 XML_3.99-0.13 farver_2.1.1
## [58] sass_0.4.4 dbplyr_2.2.1 locfit_1.5-9.6
## [61] utf8_1.2.2 tidyselect_1.2.0 labeling_0.4.2
## [64] rlang_1.0.6 munsell_0.5.0 cellranger_1.1.0
## [67] tools_4.1.1 cachem_1.0.6 cli_3.4.1
## [70] generics_0.1.3 RSQLite_2.2.18 broom_1.0.2
## [73] evaluate_0.19 fastmap_1.1.0 yaml_2.3.6
## [76] babelgene_22.9 knitr_1.41 bit64_4.0.5
## [79] fs_1.5.2 KEGGREST_1.34.0 xml2_1.3.3
## [82] compiler_4.1.1 rstudioapi_0.14 png_0.1-8
## [85] reprex_2.0.2 geneplotter_1.72.0 bslib_0.4.2
## [88] stringi_1.7.6 highr_0.9 gdtools_0.2.4
## [91] lattice_0.20-44 Matrix_1.5-3 vctrs_0.5.1
## [94] pillar_1.8.1 lifecycle_1.0.3 jquerylib_0.1.4
## [97] data.table_1.14.6 bitops_1.0-7 R6_2.5.1
## [100] gridExtra_2.3 MASS_7.3-54 assertthat_0.2.1
## [103] withr_2.5.0 GenomeInfoDbData_1.2.7 parallel_4.1.1
## [106] hms_1.1.2 grid_4.1.1 coda_0.19-4
## [109] rmarkdown_2.19 googledrive_2.0.0 bbmle_1.0.25
## [112] numDeriv_2016.8-1.1 lubridate_1.9.0